*Set up

clear all

set memory 10g

set more off

log using "\\ead02\ead_uquam\Localization\restat_results\moving_window", replace 

use "\\ead02\ead_uquam\Localization\NAICS6_panel\cdf_all_rhs.dta", clear

cd "\\ead02\ead_uquam\Localization\restat_results"

sort naics year
merge m:1 naics year using "\\ead02\ead_uquam\Localization\restat_mpa\mpa.dta"
drop _merge


*Restrict data set by weight used (options: none, emp, vst);

keep if weight=="none"

*tset data;

destring naics, replace
destring oecd80, replace


*generate variables;

**aggregate naics codes;
gen naics2=floor(naics/10000)
gen naics3=floor(naics/1000)

gen totalemp = salemp + prdwrk

*generate ln_cdf = ln(cdf)
generate ln_cdf_window = ln(cdf_window)

generate ln_av = ln(av_klems)
gen lnav_klems= ln(av_klems)

generate lnnfown = ln(nfown)
generate lnnmulti = ln(nmulti1)
generate lnm_emp = ln(m_emp)
generate lnvsm = ln(vsm)
generate lnnrs = ln(nrs)
generate lnherfent = ln(herfent)
generate lnemp = ln(salemp + prdwrk)
gen ln_empl = ln(salemp+prdwrk)
gen empl = salemp+prdwrk
gen lnemplshr=ln(salemp/(salemp+prdwrk))


//generate proxy for labor market pooling: 

*hours worked

gen ifqh2shr=ifqh2/ifqh
gen ifqh3shr=ifqh3/ifqh
gen ifqh4shr=ifqh4/ifqh
gen ifqh34shr=(ifqh3+ifqh4)/ifqh
gen ifqh23shr=(ifqh2+ifqh3)/ifqh

gen lnifqh2shr=ln(ifqh2shr)
gen lnifqh3shr=ln(ifqh3shr)
gen lnifqh4shr=ln(ifqh4shr)
gen lnifqh23shr=ln(ifqh23shr)
gen lnifqh34shr=ln((ifqh3+ifqh4)/ifqh)

gen lnifqh2shr2=lnifqh2shr*lnifqh2shr
gen lnifqh3shr2=lnifqh3shr*lnifqh3shr
gen lnifqh4shr2=lnifqh4shr*lnifqh4shr
gen lnifqh23shr2=lnifqh23shr*lnifqh23shr


//proxying R&D intensity: R&D expenditure / total output

gen rdlshr=rdl/vsml
gen lnrdl=ln(rdl/vsml)
gen lnrdls=ln(rdls/vsml)

//generate proxy for natural advantage
gen lnpee =ln(pee/pvv)
*gen lnpee =ln(pee_gn)/ln(pvv_gn)
gen peeshr=pee/pvv

//Input and output distance

gen lnl_idist_n10=ln(l_idist_n10)
gen lnl_odist_n10=ln(l_odist_n10)
gen lnl_idist_n7=ln(l_idist_n7)
gen lnl_odist_n7=ln(l_odist_n7)
gen lnl_idist_n5=ln(l_idist_n5)
gen lnl_odist_n5=ln(l_odist_n5)
gen lnl_idist_n3=ln(l_idist_n3)
gen lnl_odist_n3=ln(l_odist_n3)

//Input and output distance imputed

gen lnl_idist_n10i=ln(l_idist_n10i)
gen lnl_odist_n10i=ln(l_odist_n10i)
gen lnl_idist_n7i=ln(l_idist_n7i)
gen lnl_odist_n7i=ln(l_odist_n7i)
gen lnl_idist_n5i=ln(l_idist_n5i)
gen lnl_odist_n5i=ln(l_odist_n5i)
gen lnl_idist_n3i=ln(l_idist_n3i)
gen lnl_odist_n3i=ln(l_odist_n3i)

//gen minimum distances
gen lndistn2=ln(distn2)
gen lndistn3=ln(distn3)
gen lndistn5=ln(distn5)
gen lndistn7=ln(distn7)
gen lndistn10=ln(distn10)

gen lndistn2i=ln(distn2i)
gen lndistn3i=ln(distn3i)
gen lndistn5i=ln(distn5i)
gen lndistn7i=ln(distn7i)
gen lndistn10i=ln(distn10i)


//save unweighted panel
*N.B.: The import and export shares enter the estimation UNTRANSFORMED;


//save unweighted panel
sort naics year distance
save "temp/cdf_all_rhs_noweight_new.dta", replace

use "temp/cdf_all_rhs_noweight_new.dta", replace

//Get the coefficients for all distances
keep if distance == 9

xtset naics year, delta(1)

//generate the residual of transport variable
xtreg lnav_klems lnmfpa i.year, fe  cluster(naics)
predict double xb
gen lnav_klems_resid=lnav_klems-xb

#delimit ;

//Run the first regression at distance = 10

xi: xtreg  ln_cdf_window ln_empl lnherfent lnm_emp nmulti1 nfown lnnrs lnpee lnifqh3shr lnrdl m_asiashr m_oecdshr m_naftashr x_asiashr x_oecdshr x_naftashr lnav_klems_resid lnl_idist_n5 lnl_odist_n5 lndistn5 i.year, fe cluster(naics);


#delimit cr

//Store away the coefficients of interest and the standard errors

scalar b_m_asiashr = _b[m_asiashr]
scalar se_m_asiashr = _se[m_asiashr]
scalar b_x_oecdshr = _b[x_oecdshr]
scalar se_x_oecdshr = _se[x_oecdshr]
scalar b_lnav_klems = _b[lnav_klems_resid]
scalar se_lnav_klems = _se[lnav_klems_resid]
scalar b_lnl_idist_n5 = _b[lnl_idist_n5]
scalar se_lnl_idist_n5 = _se[lnl_idist_n5]
scalar b_lnl_odist_n5 = _b[lnl_odist_n5]
scalar se_lnl_odist_n5 = _se[lnl_odist_n5]
scalar b_lndistn5 = _b[lndistn5]
scalar se_lndistn5 = _se[lndistn5]


gen b0 = 9
gen b1 = 0
replace b1 = b_m_asiashr in 1
gen s1 = 0
replace s1 = se_m_asiashr in 1
gen b2 = 0
replace b2 = b_x_oecdshr in 1
gen s2 = 0
replace s2 = se_x_oecdshr in 1
gen b3 = 0
replace b3 = b_lnav_klems in 1
gen s3 = 0
replace s3 = se_lnav_klems in 1
gen b4 = 0
replace b4 = b_lnl_idist_n5 in 1
gen s4 = 0
replace s4 = se_lnl_idist_n5 in 1
gen b5 = 0
replace b5 =  b_lnl_odist_n5 in 1
gen s5 = 0
replace s5 = se_lnl_odist_n5 in 1
gen b6 = 0
replace b6 = b_lndistn5 in 1
gen s6 = 0
replace s6 =  se_lndistn5 in 1

//Keep only one observation

keep if _n == 1
keep b0-b6 s1-s6 
order b0 b1 s1 b2 s2 b3 s3 b4 s4 b5 s5 b6 s6

//Save to file

save "temp/distreg_new.dta", replace

//Loop over all distances from 2 to 799, append to the file previously created


forvalues i=19(10)799{

use "temp/cdf_all_rhs_noweight_new.dta", clear

keep if distance == `i'

set more off 

xtset naics year, delta(1)

xtreg lnav_klems lnmfpa i.year, fe  cluster(naics)
predict double xb
gen lnav_klems_resid=lnav_klems-xb

#delimit ;

xi: xtreg ln_cdf_window ln_empl lnherfent lnm_emp nmulti1 nfown lnnrs lnpee lnifqh3shr lnrdl m_asiashr m_oecdshr m_naftashr x_asiashr x_oecdshr x_naftashr lnav_klems_resid lnl_idist_n5 lnl_odist_n5 lndistn5 i.year, fe cluster(naics);

#delimit cr

scalar b_m_asiashr = _b[m_asiashr]
scalar se_m_asiashr = _se[m_asiashr]
scalar b_x_oecdshr = _b[x_oecdshr]
scalar se_x_oecdshr = _se[x_oecdshr]
scalar b_lnav_klems = _b[lnav_klems_resid]
scalar se_lnav_klems = _se[lnav_klems_resid]
scalar b_lnl_idist_n5 = _b[lnl_idist_n5]
scalar se_lnl_idist_n5 = _se[lnl_idist_n5]
scalar b_lnl_odist_n5 = _b[lnl_odist_n5]
scalar se_lnl_odist_n5 = _se[lnl_odist_n5]
scalar b_lndistn5 = _b[lndistn5]
scalar se_lndistn5 = _se[lndistn5]

gen b0 = `i'
gen b1 = 0
replace b1 = b_m_asiashr in 1
gen s1 = 0
replace s1 = se_m_asiashr in 1
gen b2 = 0
replace b2 = b_x_oecdshr in 1
gen s2 = 0
replace s2 = se_x_oecdshr in 1
gen b3 = 0
replace b3 = b_lnav_klems in 1
gen s3 = 0
replace s3 = se_lnav_klems in 1
gen b4 = 0
replace b4 = b_lnl_idist_n5 in 1
gen s4 = 0
replace s4 = se_lnl_idist_n5 in 1
gen b5 = 0
replace b5 = b_lnl_odist_n5 in 1
gen s5 = 0
replace s5 = se_lnl_odist_n5 in 1
gen b6 = 0
replace b6 = b_lndistn5 in 1
gen s6 = 0
replace s6 = se_lndistn5 in 1

keep b0-b6 s1-s6
keep if _n == 1
order b0 b1 s1 b2 s2 b3 s3 b4 s4 b5 s5 b6 s6


append using "temp/distreg_new.dta"

save "temp/distreg_new.dta", replace

}

//Generate 10 percent confidence bands
gen b1_lb = b1 - 1.6449*s1
gen b1_ub = b1 + 1.6449*s1

gen b2_lb = b2 - 1.6449*s2
gen b2_ub = b2 + 1.6449*s2

gen b3_lb = b3 - 1.6449*s3
gen b3_ub = b3 + 1.6449*s3

gen b4_lb = b4 - 1.6449*s4
gen b4_ub = b4 + 1.6449*s4

gen b5_lb = b5 - 1.6449*s5
gen b5_ub = b5 + 1.6449*s5

gen b6_lb = b6 - 1.6449*s6
gen b6_ub = b6 + 1.6449*s6

///graph

*replace b0 = b0 + 9

save "temp/distreg_new.dta", replace

//Asia share of importation coefficient by distance
twoway (connected b1 b1_lb b1_ub b0 if b0<=200, lcolor(black))
save "\\ead02\ead_uquam\Localization\restat_results\coef_by_moving_window\asiashrcoeffw.gph", replace

//OECD share of exportation coefficient by distance
twoway (connected b2 b2_lb b2_ub b0 if b0<=500, lcolor(black))
save "\\ead02\ead_uquam\Localization\restat_results\coef_by_moving_window\oecdshrcoeffw.gph", replace

//Transportation cost coefficient by distance
twoway (connected b3 b3_lb b3_ub b0 if b0<=150, lcolor(black))
save "\\ead02\ead_uquam\Localization\restat_results\coef_by_moving_window\transportcoeffw_rev1.gph", replace

//Input distance coefficient by distance
twoway (connected b4 b4_lb b4_ub b0 if b0<=200, lcolor(black))
save "\\ead02\ead_uquam\Localization\restat_results\coef_by_moving_window\inputcoeffw.gph", replace

//output distance coefficient by distance
twoway (connected b5 b5_lb b5_ub b0 if b0<=200, lcolor(black))
save "\\ead02\ead_uquam\Localization\restat_results\coef_by_moving_window\outputcoeffw.gph", replace

//minimum distance coefficient by distance
twoway (connected b6 b6_lb b6_ub b0 if b0<=200, lcolor(black))
save "\\ead02\ead_uquam\Localization\restat_results\coef_by_moving_window\mindistcoeffw.gph", replace


log close
